package com.offcn.bigdata.spark.homework

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
  * //男性总数、女性总数、男性最高身高、女性最高身高、男性最低身高、女性最低身高。
  */
object HomeWork1 {
    def main(args: Array[String]): Unit = {
        val conf = new SparkConf().setMaster("local[*]").setAppName("Homework1")
        val sc = new SparkContext(conf)
        var list = sc.parallelize(List(
            "1    F    170",
            "2    M    178",
            "3    M    174",
            "4    F    165",
            "5    M    179",
            "6    F    160"
        ))

        val peoples: RDD[People] = list.map(line => {
            val split = line.split("\\s+")
            if (split == null || split.length != 3) {
                People(-1, null, -1)
            } else {
                People(split(0).toInt, split(1), split(2).toInt)
            }
        }).filter(people => people.sex != null)

        val result = peoples.map(people => (people.sex, Result(people.sex, 1, people.height, people.height)))
                .reduceByKey((result1, result2) => {
                    var maxHeight = result1.maxHeight
                    if(maxHeight < result2.maxHeight) {
                        maxHeight = result2.maxHeight
                    }

                    var minHeight = result1.minHeight
                    if(minHeight > result2.minHeight) {
                        minHeight = result2.minHeight
                    }
                    Result(result1.sex, result1.count + result2.count, maxHeight, minHeight)
                })

        result.foreach{case (sex, result) => {
            println(s"性别为${sex}的人数：${result.count}, 最高身高：${result.maxHeight}, 最低身高： ${result.minHeight}")
        }}

        sc.stop
    }
}
case class Result(sex: String, count: Int, maxHeight: Int, minHeight: Int)
case class People(id: Int, sex: String, height: Int)
